|
1
|
|
|
'use strict'; |
|
2
|
|
|
|
|
3
|
|
|
const Wappalyzer = require('./wappalyzer'); |
|
4
|
|
|
const url = require('url'); |
|
5
|
|
|
const fs = require('fs'); |
|
6
|
|
|
const path = require('path'); |
|
7
|
|
|
const Browser = require('zombie'); |
|
8
|
|
|
|
|
9
|
|
|
const json = JSON.parse(fs.readFileSync(path.resolve(__dirname + '/apps.json'))); |
|
|
|
|
|
|
10
|
|
|
|
|
11
|
|
|
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/; |
|
12
|
|
|
|
|
13
|
|
|
class Driver { |
|
14
|
|
|
constructor(pageUrl, options) { |
|
15
|
|
|
this.options = Object.assign({}, { |
|
16
|
|
|
password: '', |
|
17
|
|
|
proxy: null, |
|
18
|
|
|
username: '', |
|
19
|
|
|
chunkSize: 5, |
|
20
|
|
|
debug: false, |
|
21
|
|
|
delay: 500, |
|
22
|
|
|
htmlMaxCols: 2000, |
|
23
|
|
|
htmlMaxRows: 3000, |
|
24
|
|
|
maxDepth: 3, |
|
25
|
|
|
maxUrls: 10, |
|
26
|
|
|
maxWait: 5000, |
|
27
|
|
|
recursive: false, |
|
28
|
|
|
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)', |
|
29
|
|
|
}, options || {}); |
|
30
|
|
|
|
|
31
|
|
|
this.options.debug = Boolean(+this.options.debug); |
|
32
|
|
|
this.options.recursive = Boolean(+this.options.recursive); |
|
33
|
|
|
this.options.delay = this.options.recursive ? parseInt(this.options.delay, 10) : 0; |
|
34
|
|
|
this.options.maxDepth = parseInt(this.options.maxDepth, 10); |
|
35
|
|
|
this.options.maxUrls = parseInt(this.options.maxUrls, 10); |
|
36
|
|
|
this.options.maxWait = parseInt(this.options.maxWait, 10); |
|
37
|
|
|
this.options.htmlMaxCols = parseInt(this.options.htmlMaxCols, 10); |
|
38
|
|
|
this.options.htmlMaxRows = parseInt(this.options.htmlMaxRows, 10); |
|
39
|
|
|
|
|
40
|
|
|
this.origPageUrl = url.parse(pageUrl); |
|
41
|
|
|
this.analyzedPageUrls = []; |
|
42
|
|
|
this.apps = []; |
|
43
|
|
|
this.meta = {}; |
|
44
|
|
|
|
|
45
|
|
|
this.wappalyzer = new Wappalyzer(); |
|
46
|
|
|
|
|
47
|
|
|
this.wappalyzer.apps = json.apps; |
|
48
|
|
|
this.wappalyzer.categories = json.categories; |
|
49
|
|
|
|
|
50
|
|
|
this.wappalyzer.parseJsPatterns(); |
|
51
|
|
|
|
|
52
|
|
|
this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type); |
|
53
|
|
|
this.wappalyzer.driver.displayApps = (detected, meta, context) => this.displayApps(detected, meta, context); |
|
54
|
|
|
|
|
55
|
|
|
process.on('uncaughtException', e => this.wappalyzer.log('Uncaught exception: ' + e.message, 'driver', 'error')); |
|
56
|
|
|
} |
|
57
|
|
|
|
|
58
|
|
|
analyze() { |
|
59
|
|
|
this.time = { |
|
60
|
|
|
start: new Date().getTime(), |
|
61
|
|
|
last: new Date().getTime(), |
|
62
|
|
|
} |
|
63
|
|
|
|
|
64
|
|
|
return this.crawl(this.origPageUrl); |
|
65
|
|
|
} |
|
66
|
|
|
|
|
67
|
|
|
log(message, source, type) { |
|
68
|
|
|
this.options.debug && console.log('[wappalyzer ' + type + ']', '[' + source + ']', message); |
|
|
|
|
|
|
69
|
|
|
} |
|
70
|
|
|
|
|
71
|
|
|
displayApps(detected, meta) { |
|
72
|
|
|
this.meta = meta; |
|
73
|
|
|
|
|
74
|
|
|
Object.keys(detected).forEach(appName => { |
|
75
|
|
|
const app = detected[appName]; |
|
76
|
|
|
|
|
77
|
|
|
var categories = []; |
|
78
|
|
|
|
|
79
|
|
|
app.props.cats.forEach(id => { |
|
80
|
|
|
var category = {}; |
|
81
|
|
|
|
|
82
|
|
|
category[id] = json.categories[id].name; |
|
83
|
|
|
|
|
84
|
|
|
categories.push(category) |
|
85
|
|
|
}); |
|
86
|
|
|
|
|
87
|
|
|
if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) { |
|
88
|
|
|
this.apps.push({ |
|
89
|
|
|
name: app.name, |
|
90
|
|
|
confidence: app.confidenceTotal.toString(), |
|
91
|
|
|
version: app.version, |
|
92
|
|
|
icon: app.props.icon || 'default.svg', |
|
93
|
|
|
website: app.props.website, |
|
94
|
|
|
categories |
|
95
|
|
|
}); |
|
96
|
|
|
} |
|
97
|
|
|
}); |
|
98
|
|
|
} |
|
99
|
|
|
|
|
100
|
|
|
fetch(pageUrl, index, depth) { |
|
101
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached |
|
102
|
|
|
if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) { |
|
103
|
|
|
return Promise.resolve(); |
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
|
|
this.analyzedPageUrls.push(pageUrl.href); |
|
107
|
|
|
|
|
108
|
|
|
const timerScope = { |
|
109
|
|
|
last: new Date().getTime() |
|
110
|
|
|
}; |
|
111
|
|
|
|
|
112
|
|
|
this.timer('fetch; url: ' + pageUrl.href + '; depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms', timerScope); |
|
113
|
|
|
|
|
114
|
|
|
return new Promise(resolve => this.sleep(this.options.delay * index).then(() => this.visit(pageUrl, timerScope, resolve))); |
|
115
|
|
|
} |
|
116
|
|
|
|
|
117
|
|
|
visit(pageUrl, timerScope, resolve) { |
|
118
|
|
|
const browser = new Browser({ |
|
119
|
|
|
proxy: this.options.proxy, |
|
120
|
|
|
silent: true, |
|
121
|
|
|
strictSSL: false, |
|
122
|
|
|
userAgent: this.options.userAgent, |
|
123
|
|
|
waitDuration: this.options.maxWait, |
|
124
|
|
|
}); |
|
125
|
|
|
|
|
126
|
|
|
browser.on('authenticate', auth => { |
|
127
|
|
|
auth.username = this.options.username; |
|
128
|
|
|
auth.password = this.options.password; |
|
129
|
|
|
}); |
|
130
|
|
|
|
|
131
|
|
|
this.timer('browser.visit start; url: ' + pageUrl.href, timerScope); |
|
132
|
|
|
|
|
133
|
|
|
browser.visit(pageUrl.href, () => { |
|
134
|
|
|
this.timer('browser.visit end; url: ' + pageUrl.href, timerScope); |
|
135
|
|
|
|
|
136
|
|
|
if ( !this.responseOk(browser, pageUrl) ) { |
|
137
|
|
|
return resolve(); |
|
138
|
|
|
} |
|
139
|
|
|
|
|
140
|
|
|
const headers = this.getHeaders(browser); |
|
141
|
|
|
const html = this.getHtml(browser); |
|
142
|
|
|
const scripts = this.getScripts(browser); |
|
143
|
|
|
const js = this.getJs(browser); |
|
144
|
|
|
const cookies = this.getCookies(browser); |
|
145
|
|
|
|
|
146
|
|
|
this.wappalyzer.analyze(pageUrl, { |
|
147
|
|
|
headers, |
|
148
|
|
|
html, |
|
149
|
|
|
scripts, |
|
150
|
|
|
js, |
|
151
|
|
|
cookies, |
|
152
|
|
|
}) |
|
153
|
|
|
.then(() => { |
|
154
|
|
|
const links = Array.prototype.reduce.call( |
|
155
|
|
|
browser.document.getElementsByTagName('a'), (results, link) => { |
|
156
|
|
|
if ( link.protocol.match(/https?:/) && link.hostname === this.origPageUrl.hostname && extensions.test(link.pathname) ) { |
|
157
|
|
|
link.hash = ''; |
|
158
|
|
|
|
|
159
|
|
|
results.push(url.parse(link.href)); |
|
160
|
|
|
} |
|
161
|
|
|
|
|
162
|
|
|
return results; |
|
163
|
|
|
}, [] |
|
164
|
|
|
); |
|
165
|
|
|
|
|
166
|
|
|
return resolve(links); |
|
167
|
|
|
}); |
|
|
|
|
|
|
168
|
|
|
}); |
|
169
|
|
|
} |
|
170
|
|
|
|
|
171
|
|
|
responseOk(browser, pageUrl) { |
|
172
|
|
|
// Validate response |
|
173
|
|
|
const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null; |
|
174
|
|
|
|
|
175
|
|
|
if ( !resource ) { |
|
176
|
|
|
this.wappalyzer.log('No response from server; url: ' + pageUrl.href, 'driver', 'error'); |
|
177
|
|
|
|
|
178
|
|
|
return false; |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
if ( resource.response.status !== 200 ) { |
|
182
|
|
|
this.wappalyzer.log('Response was not OK; status: ' + resource.response.status + ' ' + resource.response.statusText + '; url: ' + pageUrl.href, 'driver', 'error'); |
|
183
|
|
|
|
|
184
|
|
|
return false; |
|
185
|
|
|
} |
|
186
|
|
|
|
|
187
|
|
|
const headers = this.getHeaders(browser); |
|
188
|
|
|
|
|
189
|
|
|
// Validate content type |
|
190
|
|
|
const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null; |
|
191
|
|
|
|
|
192
|
|
|
if ( !contentType || !/\btext\/html\b/.test(contentType) ) { |
|
193
|
|
|
this.wappalyzer.log('Skipping; url: ' + pageUrl.href + '; content type: ' + contentType, 'driver'); |
|
194
|
|
|
|
|
195
|
|
|
this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1); |
|
196
|
|
|
|
|
197
|
|
|
return false; |
|
198
|
|
|
} |
|
199
|
|
|
|
|
200
|
|
|
// Validate document |
|
201
|
|
|
if ( !browser.document || !browser.document.documentElement ) { |
|
202
|
|
|
this.wappalyzer.log('No HTML document; url: ' + pageUrl.href, 'driver', 'error'); |
|
203
|
|
|
|
|
204
|
|
|
return false; |
|
205
|
|
|
} |
|
206
|
|
|
|
|
207
|
|
|
return true; |
|
208
|
|
|
} |
|
209
|
|
|
|
|
210
|
|
|
getHeaders(browser) { |
|
211
|
|
|
const headers = {}; |
|
212
|
|
|
|
|
213
|
|
|
const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null; |
|
214
|
|
|
|
|
215
|
|
|
if ( resource ) { |
|
216
|
|
|
resource.response.headers._headers.forEach(header => { |
|
217
|
|
|
if ( !headers[header[0]] ){ |
|
218
|
|
|
headers[header[0]] = []; |
|
219
|
|
|
} |
|
220
|
|
|
|
|
221
|
|
|
headers[header[0]].push(header[1]); |
|
222
|
|
|
}); |
|
223
|
|
|
} |
|
224
|
|
|
|
|
225
|
|
|
return headers; |
|
226
|
|
|
} |
|
227
|
|
|
|
|
228
|
|
|
getHtml(browser) { |
|
229
|
|
|
let html = ''; |
|
230
|
|
|
|
|
231
|
|
|
try { |
|
232
|
|
|
html = browser.html() |
|
233
|
|
|
.split('\n') |
|
234
|
|
|
.slice(0, this.options.htmlMaxRows / 2).concat(html.slice(html.length - this.options.htmlMaxRows / 2)) |
|
235
|
|
|
.map(line => line.substring(0, this.options.htmlMaxCols)) |
|
236
|
|
|
.join('\n'); |
|
237
|
|
|
} catch ( error ) { |
|
238
|
|
|
this.wappalyzer.log(error.message, 'browser', 'error'); |
|
239
|
|
|
} |
|
240
|
|
|
|
|
241
|
|
|
return html; |
|
242
|
|
|
} |
|
243
|
|
|
|
|
244
|
|
|
getScripts(browser) { |
|
245
|
|
|
if ( !browser.document || !browser.document.scripts ) { |
|
246
|
|
|
return []; |
|
247
|
|
|
} |
|
248
|
|
|
|
|
249
|
|
|
const scripts = Array.prototype.slice |
|
250
|
|
|
.apply(browser.document.scripts) |
|
251
|
|
|
.filter(script => script.src) |
|
252
|
|
|
.map(script => script.src); |
|
253
|
|
|
|
|
254
|
|
|
return scripts; |
|
255
|
|
|
} |
|
256
|
|
|
|
|
257
|
|
|
getJs(browser) { |
|
258
|
|
|
const patterns = this.wappalyzer.jsPatterns; |
|
259
|
|
|
const js = {}; |
|
260
|
|
|
|
|
261
|
|
|
Object.keys(patterns).forEach(appName => { |
|
262
|
|
|
js[appName] = {}; |
|
263
|
|
|
|
|
264
|
|
|
Object.keys(patterns[appName]).forEach(chain => { |
|
265
|
|
|
js[appName][chain] = {}; |
|
266
|
|
|
|
|
267
|
|
|
patterns[appName][chain].forEach((pattern, index) => { |
|
268
|
|
|
const properties = chain.split('.'); |
|
269
|
|
|
|
|
270
|
|
|
let value = properties.reduce((parent, property) => { |
|
271
|
|
|
return parent && parent.hasOwnProperty(property) ? parent[property] : null; |
|
272
|
|
|
}, browser.window); |
|
273
|
|
|
|
|
274
|
|
|
value = typeof value === 'string' || typeof value === 'number' ? value : !!value; |
|
275
|
|
|
|
|
276
|
|
|
if ( value ) { |
|
277
|
|
|
js[appName][chain][index] = value; |
|
278
|
|
|
} |
|
279
|
|
|
}); |
|
280
|
|
|
}); |
|
281
|
|
|
}); |
|
282
|
|
|
|
|
283
|
|
|
return js; |
|
284
|
|
|
} |
|
285
|
|
|
|
|
286
|
|
|
getCookies(browser) { |
|
287
|
|
|
const cookies = []; |
|
288
|
|
|
|
|
289
|
|
|
if ( browser.cookies ) { |
|
290
|
|
|
browser.cookies.forEach(cookie => cookies.push({ |
|
291
|
|
|
name: cookie.key, |
|
292
|
|
|
value: cookie.value, |
|
293
|
|
|
domain: cookie.domain, |
|
294
|
|
|
path: cookie.path, |
|
295
|
|
|
})); |
|
296
|
|
|
} |
|
297
|
|
|
|
|
298
|
|
|
return cookies; |
|
299
|
|
|
} |
|
300
|
|
|
|
|
301
|
|
|
crawl(pageUrl, index = 1, depth = 1) { |
|
302
|
|
|
pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname; |
|
303
|
|
|
|
|
304
|
|
|
return new Promise(resolve => { |
|
305
|
|
|
this.fetch(pageUrl, index, depth) |
|
306
|
|
|
.catch(() => {}) |
|
307
|
|
|
.then(links => { |
|
308
|
|
|
if ( links && this.options.recursive && depth < this.options.maxDepth ) { |
|
309
|
|
|
return this.chunk(links.slice(0, this.options.maxUrls), depth + 1); |
|
310
|
|
|
} else { |
|
311
|
|
|
return Promise.resolve(); |
|
312
|
|
|
} |
|
313
|
|
|
}) |
|
314
|
|
|
.then(() => { |
|
315
|
|
|
resolve({ |
|
316
|
|
|
urls: this.analyzedPageUrls, |
|
317
|
|
|
applications: this.apps, |
|
318
|
|
|
meta: this.meta |
|
319
|
|
|
}); |
|
320
|
|
|
}); |
|
321
|
|
|
}); |
|
322
|
|
|
} |
|
323
|
|
|
|
|
324
|
|
|
chunk(links, depth, chunk = 0) { |
|
325
|
|
|
if ( links.length === 0 ) { |
|
326
|
|
|
return Promise.resolve(); |
|
327
|
|
|
} |
|
328
|
|
|
|
|
329
|
|
|
const chunked = links.splice(0, this.options.chunkSize); |
|
330
|
|
|
|
|
331
|
|
|
return new Promise(resolve => { |
|
332
|
|
|
Promise.all(chunked.map((link, index) => this.crawl(link, index, depth))) |
|
333
|
|
|
.then(() => this.chunk(links, depth, chunk + 1)) |
|
334
|
|
|
.then(() => resolve()); |
|
335
|
|
|
}); |
|
336
|
|
|
} |
|
337
|
|
|
|
|
338
|
|
|
sleep(ms) { |
|
339
|
|
|
return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve(); |
|
340
|
|
|
} |
|
341
|
|
|
|
|
342
|
|
|
timer(message, scope) { |
|
343
|
|
|
const time = new Date().getTime(); |
|
344
|
|
|
const sinceStart = ( Math.round(( time - this.time.start ) / 10) / 100) + 's'; |
|
345
|
|
|
const sinceLast = ( Math.round(( time - scope.last ) / 10) / 100) + 's'; |
|
346
|
|
|
|
|
347
|
|
|
this.wappalyzer.log('[timer] ' + message + '; lapsed: ' + sinceLast + ' / ' + sinceStart, 'driver'); |
|
348
|
|
|
|
|
349
|
|
|
scope.last = time; |
|
350
|
|
|
} |
|
351
|
|
|
}; |
|
352
|
|
|
|
|
353
|
|
|
module.exports = Driver; |
|
354
|
|
|
|